# load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
library(ggthemes)
library(viridis)
## Loading required package: viridisLite
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:viridis':
##
## viridis_pal
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(wrapr)
##
## Attaching package: 'wrapr'
##
## The following objects are masked from 'package:data.table':
##
## :=, let
##
## The following object is masked from 'package:dplyr':
##
## coalesce
##
## The following objects are masked from 'package:tidyr':
##
## pack, unpack
##
## The following object is masked from 'package:tibble':
##
## view
library(tidyplots)
# load tables
gnomAD_main_table_full_cCREs <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs.rds")
gnomAD_main_table_full_cCREs_in_TF <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_in_TF.rds")
gnomAD_main_table_full_cCREs_in_rep <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_in_rep.rds")
gnomAD_main_table_full_emVar_cat <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_emVar_cat.rds")
gnomAD_main_table_full_cCREs_emVar_cat <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_emVar_cat.rds")
gnomAD_main_table_full_cCREs_pleio <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_pleio.rds")
gnomAD_main_table_full_cCREs_pleio_mean <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_pleio_mean.rds")
gnomAD_main_table_full_CADD <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_CADD.rds")
gnomAD_main_table_full_cCREs_CADD <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_CADD.rds")
gnomAD_main_table_full_cCREs_mean <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_mean.rds")
gnomAD_main_table_full_cCREs_mean_in_TF <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_mean_in_TF.rds")
gnomAD_main_table_full_cCREs_mean_in_rep <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_mean_in_rep.rds")
gnomAD_main_table_full_cCREs_K562 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562.rds")
gnomAD_main_table_full_cCREs_K562_in_TF <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562_in_TF.rds")
gnomAD_main_table_full_cCREs_K562_in_rep <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562_in_rep.rds")
gnomAD_main_table_full_cCREs_HepG2 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_HepG2.rds")
gnomAD_main_table_full_cCREs_HepG2_in_TF <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_HepG2_in_TF.rds")
gnomAD_main_table_full_cCREs_HepG2_in_rep <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_HepG2_in_rep.rds")
gnomAD_main_table_full_cCREs_SKNSH <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_SKNSH.rds")
gnomAD_main_table_full_cCREs_SKNSH_in_TF <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_SKNSH_in_TF.rds")
gnomAD_main_table_full_cCREs_SKNSH_in_rep <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_SKNSH_in_rep.rds")
gnomAD_main_table_full_cCREs_mean_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_mean_ref.rds")
gnomAD_main_table_full_cCREs_K562_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562_ref.rds")
gnomAD_main_table_full_cCREs_HepG2_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_HepG2_ref.rds")
gnomAD_main_table_full_cCREs_SKNSH_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_SKNSH_ref.rds")
gnomAD_main_table_full_in_TF_mean <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_mean.rds")
gnomAD_main_table_full_in_TF_K562 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_K562.rds")
gnomAD_main_table_full_in_TF_HepG2 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_HepG2.rds")
gnomAD_main_table_full_in_TF_SKNSH <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_SKNSH.rds")
gnomAD_main_table_full_in_TF_mean_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_mean_ref.rds")
gnomAD_main_table_full_in_TF_K562_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_K562_ref.rds")
gnomAD_main_table_full_in_TF_HepG2_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_HepG2_ref.rds")
gnomAD_main_table_full_in_TF_SKNSH_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_TF_SKNSH_ref.rds")
gnomAD_main_table_full_in_rep_mean <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_mean.rds")
gnomAD_main_table_full_in_rep_K562 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_K562.rds")
gnomAD_main_table_full_in_rep_HepG2 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_HepG2.rds")
gnomAD_main_table_full_in_rep_SKNSH <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_SKNSH.rds")
gnomAD_main_table_full_in_rep_mean_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_mean_ref.rds")
gnomAD_main_table_full_in_rep_K562_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_K562_ref.rds")
gnomAD_main_table_full_in_rep_HepG2_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_HepG2_ref.rds")
gnomAD_main_table_full_in_rep_SKNSH_ref <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_in_rep_SKNSH_ref.rds")
gnomAD_main_table_full_cCREs_category <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_category.rds")
gnomAD_main_table_full_cCREs_mean_category <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_mean_category.rds")
gnomAD_main_table_full_cCREs_K562_category <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562_category.rds")
gnomAD_main_table_full_cCREs_HepG2_category <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_HepG2_category.rds")
gnomAD_main_table_full_cCREs_SKNSH_category <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_SKNSH_category.rds")
gnomAD_main_table_full_cCREs_mean_mean <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_mean_mean.rds")
gnomAD_main_table_full_cCREs_K562_K562 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562_K562.rds")
gnomAD_main_table_full_cCREs_HepG2_HepG2 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_HepG2_HepG2.rds")
gnomAD_main_table_full_cCREs_SKNSH_SKNSH <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_SKNSH_SKNSH.rds")
gnomAD_main_table_full_cCREs_emVar_cat_mean <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_emVar_cat_mean.rds")
gnomAD_main_table_full_cCREs_emVar_cat_K562 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_emVar_cat_K562.rds")
gnomAD_main_table_full_cCREs_emVar_cat_HepG2 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_emVar_cat_HepG2.rds")
gnomAD_main_table_full_cCREs_emVar_cat_SKNSH <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_emVar_cat_SKNSH.rds")
gnomAD_main_table_full_cCREs_K562_HepG2 <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562_HepG2.rds")
gnomAD_main_table_full_cCREs_K562_SKNSH <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_K562_SKNSH.rds")
gnomAD_main_table_full_cCREs_HepG2_SKNSH <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table_full_cCREs_HepG2_SKNSH.rds")
gnomAD_main_table <- readRDS("../../results/gnomAD_selection_preprocess/gnomAD_main_table.rds")
# how many emVars
gnomAD_main_table_summ_emVar <- gnomAD_main_table %>%
group_by(emVar_category) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_emVar %>% filter(emVar_category != "none") %>%
tidyplot(x = emVar_category, y = count, color = emVar_category) %>%
add_barstack_absolute() %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3) +
xlab("emVar category") +
ylab("Count") +
ylim(c(0, 3500000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggsave("../../results/gnomAD_selection_analysis/how_many_emVar.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
# how many emVars per cCRE
gnomAD_main_table_summ_emVar_cCRE <- gnomAD_main_table %>%
mutate(emVar = ifelse(emVar_category == "none", FALSE, TRUE)) %>%
group_by(emVar, ENCODE_cCREs) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
## `summarise()` has grouped output by 'emVar'. You can override using the
## `.groups` argument.
gnomAD_main_table_summ_emVar_cCRE %>%
filter(ENCODE_cCREs != "Other cCREs", emVar) %>%
tidyplot(x = ENCODE_cCREs, y = count, color = emVar) %>%
add_mean_bar() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3,
position = position_dodge(.9), show.legend = FALSE) +
xlab("ENCODE cCREs") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_emVar_cCRE.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
# how many allelic skew
gnomAD_main_table_summ_K562_skew <- gnomAD_main_table %>%
group_by(K562_skew_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_HepG2_skew <- gnomAD_main_table %>%
group_by(HepG2_skew_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_SKNSH_skew <- gnomAD_main_table %>%
group_by(SKNSH_skew_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_mean_skew <- gnomAD_main_table %>%
group_by(mean_skew_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_K562_skew %>%
tidyplot(x = K562_skew_pred_bin, y = count) %>%
add_barstack_absolute(fill = "#00A79D") %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3, color = "#00A79D") +
xlab("K562 allelic skew") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_K562_skew.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_summ_HepG2_skew %>%
tidyplot(x = HepG2_skew_pred_bin, y = count) %>%
add_barstack_absolute(fill = "#FBB040") %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3, color = "#FBB040") +
xlab("HepG2 allelic skew") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_HepG2_skew.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_summ_SKNSH_skew %>%
tidyplot(x = SKNSH_skew_pred_bin, y = count) %>%
add_barstack_absolute(fill = "#ED1C24") %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3, color = "#ED1C24") +
xlab("SKNSH allelic skew") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_SKNSH_skew.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_summ_mean_skew %>%
tidyplot(x = mean_skew_pred_bin, y = count) %>%
add_barstack_absolute() %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3) +
xlab("Mean allelic skew") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_mean_skew.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
nonsig_skew <- c("[-0.5, -0.2)", "[-0.2, -0.05)", "[-0.05, 0.05)", "[0.05, 0.2)", "[0.2, 0.5)")
a <- sum(filter(gnomAD_main_table, (K562_skew_pred_bin %in% nonsig_skew) & (HepG2_skew_pred_bin %in% nonsig_skew) & (SKNSH_skew_pred_bin %in% nonsig_skew)
)$count)
b <- sum(filter(gnomAD_main_table, !((K562_skew_pred_bin %in% nonsig_skew) & (HepG2_skew_pred_bin %in% nonsig_skew) & (SKNSH_skew_pred_bin %in% nonsig_skew)
))$count)
a
## [1] 504507736
b
## [1] 9378521
b/(a+b)
## [1] 0.01825019
# how many ref active
gnomAD_main_table_summ_K562_ref <- gnomAD_main_table %>%
group_by(K562_ref_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_HepG2_ref <- gnomAD_main_table %>%
group_by(HepG2_ref_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_SKNSH_ref <- gnomAD_main_table %>%
group_by(SKNSH_ref_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_mean_ref <- gnomAD_main_table %>%
group_by(mean_ref_pred_bin) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(prop = count/sum(count))
gnomAD_main_table_summ_K562_ref %>%
tidyplot(x = K562_ref_pred_bin, y = count) %>%
add_barstack_absolute(fill = "#00A79D") %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3, color = "#00A79D") +
xlab("K562 ref activity") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_K562_ref.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_summ_HepG2_ref %>%
tidyplot(x = HepG2_ref_pred_bin, y = count) %>%
add_barstack_absolute(fill = "#FBB040") %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3, color = "#FBB040") +
xlab("HepG2 ref activity") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_HepG2_ref.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_summ_SKNSH_ref %>%
tidyplot(x = SKNSH_ref_pred_bin, y = count) %>%
add_barstack_absolute(fill = "#ED1C24") %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3, color = "#ED1C24") +
xlab("SKNSH ref activity") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_SKNSH_ref.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_summ_mean_ref %>%
tidyplot(x = mean_ref_pred_bin, y = count) %>%
add_barstack_absolute() %>%
remove_legend() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3) +
xlab("Mean ref activity") +
ylab("Count")

ggsave("../../results/gnomAD_selection_analysis/how_many_mean_ref.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
nonsig_ref <- c("[-Inf, 1)")
a <- sum(filter(gnomAD_main_table, (K562_ref_pred_bin %in% nonsig_ref) & (HepG2_ref_pred_bin %in% nonsig_ref) & (SKNSH_ref_pred_bin %in% nonsig_ref)
)$count)
b <- sum(filter(gnomAD_main_table, !((K562_ref_pred_bin %in% nonsig_ref) & (HepG2_ref_pred_bin %in% nonsig_ref) & (SKNSH_ref_pred_bin %in% nonsig_ref)
))$count)
a
## [1] 462431266
b
## [1] 51454991
b/(a+b)
## [1] 0.1001291
emVar_bins <- c(" (-Inf, -1.5)", "[-1.5, -1.0)", "[-1.0, -0.5)", "[0.5, 1.0)", "[1.0, 1.5)", "[1.5, Inf)")
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "PLS") %>% filter((K562_skew_pred_bin %in% emVar_bins) | (HepG2_skew_pred_bin %in% emVar_bins) | (SKNSH_skew_pred_bin %in% emVar_bins)) %>% .$count)/
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "PLS") %>% .$count)
## [1] 0.08825359
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "pELS") %>% filter((K562_skew_pred_bin %in% emVar_bins) | (HepG2_skew_pred_bin %in% emVar_bins) | (SKNSH_skew_pred_bin %in% emVar_bins)) %>% .$count)/
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "pELS") %>% .$count)
## [1] 0.04156389
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "dELS") %>% filter((K562_skew_pred_bin %in% emVar_bins) | (HepG2_skew_pred_bin %in% emVar_bins) | (SKNSH_skew_pred_bin %in% emVar_bins)) %>% .$count)/
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "dELS") %>% .$count)
## [1] 0.03621046
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "non-cCRE") %>% filter((K562_skew_pred_bin %in% emVar_bins) | (HepG2_skew_pred_bin %in% emVar_bins) | (SKNSH_skew_pred_bin %in% emVar_bins)) %>% .$count)/
sum(gnomAD_main_table %>% filter(ENCODE_cCREs == "non-cCRE") %>% .$count)
## [1] 0.01294262
gnomAD_main_table_full_cCREs_emVar_cat_phyloP <- gnomAD_main_table_full_cCREs_emVar_cat %>%
mutate(phyloP_cons = count*phyloP_cons_prop, phyloP_noncons = count*(1-phyloP_cons_prop)) %>%
dplyr::select(ENCODE_cCREs, emVar_category, phyloP_cons, phyloP_noncons) %>%
pivot_longer(cols = c(phyloP_cons, phyloP_noncons), names_to = "phyloP", values_to = "count") %>%
mutate(phyloP = gsub("phyloP_", "", phyloP)) %>%
mutate(ENCODE_cCREs_phyloP = paste(ENCODE_cCREs, phyloP, sep="_")) %>%
group_by(ENCODE_cCREs_phyloP, ENCODE_cCREs, phyloP) %>%
mutate(count_prop = count/sum(count), base = sum(count)) %>%
ungroup() %>%
filter(emVar_category != "none") %>%
mutate(ENCODE_cCREs_phyloP = factor(ENCODE_cCREs_phyloP, levels = c("PLS_noncons", "PLS_cons", "pELS_noncons", "pELS_cons", "dELS_noncons", "dELS_cons", "non-cCRE_noncons", "non-cCRE_cons"))) %>%
mutate(emVar_category = factor(emVar_category, levels = c("K562", "HepG2", "SKNSH", "K562+HepG2", "K562+SKNSH", "SKNSH+HepG2", "K562+SKNSH+HepG2")))
write_tsv(gnomAD_main_table_full_cCREs_emVar_cat_phyloP, "../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_cat_phyloP_stacked.txt")
gnomAD_main_table_full_cCREs_emVar_cat_phyloP %>%
group_by(ENCODE_cCREs, phyloP, base) %>%
summarise(
count = sum(count),
count_prop = sum(count_prop)
) %>%
ungroup()
## `summarise()` has grouped output by 'ENCODE_cCREs', 'phyloP'. You can override
## using the `.groups` argument.
## # A tibble: 10 × 5
## ENCODE_cCREs phyloP base count count_prop
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 PLS cons 273419 48588 0.178
## 2 PLS noncons 1569996 114100 0.0727
## 3 pELS cons 646903 50374 0.0779
## 4 pELS noncons 10196308 400312 0.0393
## 5 dELS cons 3684209 160162 0.0435
## 6 dELS noncons 69919240 2505053 0.0358
## 7 non-cCRE cons 5611880 63540 0.0113
## 8 non-cCRE noncons 392563240 5089889 0.0130
## 9 Other cCREs cons 839925 20759 0.0247
## 10 Other cCREs noncons 28581137 844381 0.0295
gnomAD_main_table_full_cCREs_emVar_phyloP <- gnomAD_main_table_full_cCREs_emVar_cat %>%
mutate(phyloP_cons = count*phyloP_cons_prop, phyloP_noncons = count*(1-phyloP_cons_prop)) %>%
dplyr::select(ENCODE_cCREs, emVar_category, phyloP_cons, phyloP_noncons) %>%
pivot_longer(cols = c(phyloP_cons, phyloP_noncons), names_to = "phyloP", values_to = "count") %>%
mutate(phyloP = gsub("phyloP_", "", phyloP)) %>%
group_by(phyloP) %>%
mutate(count_prop = count/sum(count), base = sum(count)) %>%
ungroup() %>%
filter(emVar_category != "none") %>%
mutate(emVar_category = factor(emVar_category, levels = c("K562", "HepG2", "SKNSH", "K562+HepG2", "K562+SKNSH", "SKNSH+HepG2", "K562+SKNSH+HepG2")))
write_tsv(gnomAD_main_table_full_cCREs_emVar_phyloP, "../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_phyloP_stacked.txt")
gnomAD_main_table_full_cCREs_emVar_phyloP %>%
group_by(phyloP, base) %>%
summarise(
count = sum(count),
count_prop = sum(count_prop)
) %>%
ungroup()
## `summarise()` has grouped output by 'phyloP'. You can override using the
## `.groups` argument.
## # A tibble: 2 × 4
## phyloP base count count_prop
## <chr> <dbl> <dbl> <dbl>
## 1 cons 11056336 343423 0.0311
## 2 noncons 502829921 8953735 0.0178
gnomAD_main_table_full_cCREs_emVar_cat_phyloP_cCRE <- gnomAD_main_table_full_cCREs_emVar_cat_phyloP %>%
group_by(ENCODE_cCREs, phyloP, base) %>%
summarise(
count = sum(count),
count_prop = sum(count_prop)
) %>%
ungroup()
## `summarise()` has grouped output by 'ENCODE_cCREs', 'phyloP'. You can override
## using the `.groups` argument.
gnomAD_main_table_full_cCREs_emVar_phyloP_gw <- gnomAD_main_table_full_cCREs_emVar_phyloP %>%
group_by(phyloP, base) %>%
summarise(
count = sum(count),
count_prop = sum(count_prop)
) %>%
ungroup() %>%
mutate(ENCODE_cCREs = "Genome-wide")
## `summarise()` has grouped output by 'phyloP'. You can override using the
## `.groups` argument.
gnomAD_main_table_full_cCREs_emVar_phyloP_prop <- bind_rows(
gnomAD_main_table_full_cCREs_emVar_phyloP_gw,
gnomAD_main_table_full_cCREs_emVar_cat_phyloP_cCRE
)
write_tsv(gnomAD_main_table_full_cCREs_emVar_phyloP_prop, "../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_phyloP_prop.txt")
gnomAD_main_table_full_cCREs_emVar_phyloP_prop %>%
mutate(ENCODE_cCREs = factor(ENCODE_cCREs, levels = c("PLS", "pELS", "dELS", "non-cCRE", "Genome-wide"))) %>%
mutate(phyloP = ifelse(phyloP == "cons", TRUE, FALSE)) %>%
filter(ENCODE_cCREs != "Other cCREs") %>%
tidyplot(x = ENCODE_cCREs, y = count_prop, color = phyloP) %>%
add_mean_bar() %>%
adjust_x_axis(rotate_labels = TRUE) +
xlab("ENCODE cCREs") +
ylab("emVar proportion") +
labs(fill = "Constrained") +
theme(aspect.ratio = 1.6, strip.background = element_blank())

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_phyloP_prop.pdf", scale = 0.7)
## Saving 4.9 x 3.5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_full_cCREs_emVar_phyloP_prop %>%
mutate(ENCODE_cCREs = factor(ENCODE_cCREs, levels = c("PLS", "pELS", "dELS", "non-cCRE", "Genome-wide"))) %>%
mutate(phyloP = ifelse(phyloP == "cons", TRUE, FALSE)) %>%
filter(ENCODE_cCREs != "Other cCREs") %>%
tidyplot(x = ENCODE_cCREs, y = count_prop, color = phyloP) %>%
add_mean_bar() %>%
adjust_x_axis(rotate_labels = TRUE) +
xlab("ENCODE cCREs") +
ylab("emVar proportion") +
labs(fill = "Constrained") +
theme(aspect.ratio = 1/1.01, strip.background = element_blank())

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_phyloP_prop_alt.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_full_cCREs_emVar_phyloP_prop %>%
mutate(ENCODE_cCREs = factor(ENCODE_cCREs, levels = c("PLS", "pELS", "dELS", "non-cCRE", "Genome-wide"))) %>%
mutate(phyloP = ifelse(phyloP == "cons", TRUE, FALSE)) %>%
filter(ENCODE_cCREs != "Other cCREs") %>%
tidyplot(x = ENCODE_cCREs, y = count, color = phyloP) %>%
add_mean_bar() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3,
position = position_dodge(.9), show.legend = FALSE) +
xlab("ENCODE cCREs") +
ylab("emVar count") +
labs(fill = "Constrained") +
theme(aspect.ratio = 1.6, strip.background = element_blank())

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_phyloP_count.pdf", scale = 0.7)
## Saving 4.9 x 3.5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
gnomAD_main_table_full_cCREs_emVar_phyloP_prop %>%
mutate(ENCODE_cCREs = factor(ENCODE_cCREs, levels = c("PLS", "pELS", "dELS", "non-cCRE", "Genome-wide"))) %>%
mutate(phyloP = ifelse(phyloP == "cons", TRUE, FALSE)) %>%
filter(ENCODE_cCREs != "Other cCREs") %>%
tidyplot(x = ENCODE_cCREs, y = count, color = phyloP) %>%
add_mean_bar() %>%
adjust_x_axis(rotate_labels = TRUE) +
geom_text(aes(label = count), angle = 90, hjust = -0.1, size = 3,
position = position_dodge(.9), show.legend = FALSE) +
xlab("ENCODE cCREs") +
ylab("emVar count") +
labs(fill = "Constrained") +
theme(aspect.ratio = 1/1.01, strip.background = element_blank())

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_phyloP_count_alt.pdf")
## Saving 7 x 5 in image
## Warning: 'mode(bg)' differs between new and previous
## ==> NOT changing 'bg'
ggplot(gnomAD_main_table_full_cCREs,
aes(x="", y=count, fill=ENCODE_cCREs)) + # %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_col() +
geom_text(aes(label = paste0(round(count/1e6, 1), "M")),
position = position_stack(vjust = 0.5)) +
coord_polar("y", start=0) +
theme_void() +
scale_fill_manual(values = c(PLS="#3E0751", pELS="#405187", dELS="#468E8B", `non-cCRE`="#79C66E", `Other cCREs`="grey50")) +
labs(fill = "ENCODE cCREs")

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_count.pdf", scale=0.6)
## Saving 4.2 x 3 in image
ggplot(gnomAD_main_table_full_cCREs_mean_ref %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_bar(aes(x = mean_ref_pred_bin, y = count, fill = ENCODE_cCREs), stat = "identity", position = "stack") +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank(), axis.line.y = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "Mean ref activity", y = "Variants", color = "ENCODE cCREs") +
facet_grid(rows = vars(ENCODE_cCREs), scales = "free") +
theme(aspect.ratio = 1/(1.6*4), strip.background = element_blank()) +
guides(fill = "none")

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_mean_ref_count.pdf", scale=0.5)
## Saving 3.5 x 2.5 in image
ggplot(gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_bar(aes(x = mean_skew_pred_bin, y = count, fill = ENCODE_cCREs), stat = "identity", position = "stack") +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank(), axis.line.y = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "Mean allelic skew", y = "Variants", color = "ENCODE cCREs") +
facet_grid(rows = vars(ENCODE_cCREs), scales = "free") +
theme(aspect.ratio = 1/(1.6*4), strip.background = element_blank()) +
guides(fill = "none")

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_mean_skew_count.pdf", scale=0.6)
## Saving 4.2 x 3 in image
gnomAD_main_table_full_cCREs_emVar_cat <- gnomAD_main_table_full_cCREs_emVar_cat %>%
group_by(ENCODE_cCREs) %>%
mutate(count_prop = count/sum(count)) %>%
ungroup()
ggplot(gnomAD_main_table_full_cCREs_emVar_cat %>% filter(ENCODE_cCREs != "Other cCREs", emVar_category != "none")) +
geom_bar(aes(x = emVar_category, y = count_prop, fill = ENCODE_cCREs), stat = "identity", position = "stack") +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank(), axis.line.y = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "emVars (MPAC)", y = "Proportion", color = "ENCODE cCREs") +
facet_grid(rows = vars(ENCODE_cCREs), scales = "free") +
theme(aspect.ratio = 1/(1.6*4), strip.background = element_blank()) +
guides(fill = "none")

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_emVar_cat_prop.pdf", scale=0.75)
## Saving 5.25 x 3.75 in image
gnomAD_cCREs_emVar_cat_ratio_shared_vs_specific <- gnomAD_main_table_full_cCREs_emVar_cat %>% filter(ENCODE_cCREs != "Other cCREs", emVar_category != "none") %>%
dplyr::select(ENCODE_cCREs, emVar_category, count) %>%
pivot_wider(names_from = emVar_category, values_from = count) %>%
mutate(shared_vs_specific = `K562+SKNSH+HepG2`/(`K562` + `HepG2` + `SKNSH`))
gnomAD_cCREs_emVar_cat_ratio_shared_vs_specific
## # A tibble: 4 × 9
## ENCODE_cCREs K562 HepG2 SKNSH `K562+HepG2` `K562+SKNSH` `SKNSH+HepG2`
## <fct> <int> <int> <int> <int> <int> <int>
## 1 PLS 48804 5310 26471 8789 15250 11355
## 2 pELS 145101 28200 68595 24328 26640 44703
## 3 dELS 805978 218400 454219 140475 143428 296407
## 4 non-cCRE 1567945 375114 988059 218660 263041 644459
## # ℹ 2 more variables: `K562+SKNSH+HepG2` <int>, shared_vs_specific <dbl>
gnomAD_main_table_full_TF_footprints <- read_tsv("../../results/gnomAD_selection_analysis/gnomAD_TF_footprints_mean_allelic_skew.txt") %>%
mutate(mean_skew_pred_bin = factor(mean_skew_pred_bin, levels =
c("(-Inf, -1.5)", "[-1.5, -1.0)", "[-1.0, -0.5)", "[-0.5, -0.2)",
"[-0.2, -0.05)", "[-0.05, 0.05)", "[0.05, 0.2)", "[0.2, 0.5)",
"[0.5, 1.0)", "[1.0, 1.5)", "[1.5, Inf)"))
) %>%
group_by(mean_skew_pred_bin) %>%
mutate(proportion = count/sum(count)) %>%
ungroup()
## Rows: 22 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): mean_skew_pred_bin
## dbl (1): count
## lgl (1): TF_footprints
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(gnomAD_main_table_full_TF_footprints) +
geom_bar(aes(x = mean_skew_pred_bin, y = proportion, fill = TF_footprints), stat = "identity", position = "fill") +
geom_text(aes(x = mean_skew_pred_bin,
y = proportion + 0.15,
label = ifelse(TF_footprints, sprintf("%.3f", proportion), "")),
size = 3, angle = 90, na.rm = TRUE, color = "white") +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
labs(x = "Mean allelic skew", y = "Proportion", fill = "In TF ChIP-seq") +
theme(aspect.ratio = 1/1.6, strip.background = element_blank()) +
ylim(c(0,1)) +
guides(fill = "none")

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_TF_footprints_stack.pdf", scale=0.7)
## Saving 4.9 x 3.5 in image
gnomAD_main_table_full_TF_footprints_gw <- gnomAD_main_table_full_TF_footprints %>%
dplyr::select(TF_footprints, mean_skew_pred_bin, count) %>%
group_by(TF_footprints) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
pivot_wider(names_prefix = "TF_footprints_", names_from = TF_footprints, values_from = count)
gnomAD_main_table_full_TF_footprints_temp <- gnomAD_main_table_full_TF_footprints %>%
dplyr::select(TF_footprints, mean_skew_pred_bin, count) %>%
pivot_wider(names_prefix = "TF_footprints_", names_from = TF_footprints, values_from = count)
gnomAD_main_table_full_TF_footprints_fet <- gnomAD_main_table_full_TF_footprints_temp %>%
rowwise() %>%
mutate(fet_TF_footprints = list(fisher.test(matrix(c(TF_footprints_TRUE+1, TF_footprints_FALSE+1, gnomAD_main_table_full_TF_footprints_gw[["TF_footprints_TRUE"]]-TF_footprints_TRUE+1, gnomAD_main_table_full_TF_footprints_gw[["TF_footprints_FALSE"]]-TF_footprints_FALSE+1), nrow=2)))) %>%
mutate(fet_TF_footprints_pval = fet_TF_footprints$p.val) %>%
mutate(fet_TF_footprints_odds = fet_TF_footprints$estimate) %>%
mutate(fet_TF_footprints_lci = fet_TF_footprints$conf.int[1]) %>%
mutate(fet_TF_footprints_uci = fet_TF_footprints$conf.int[2]) %>%
ungroup() %>%
mutate(fet_TF_footprints_padj = p.adjust(fet_TF_footprints_pval, method = "fdr")) %>%
mutate(fet_TF_footprints_sig = ifelse(fet_TF_footprints_pval < 0.05, "*", ""))
ggplot(gnomAD_main_table_full_TF_footprints_fet %>% filter()) +
geom_bar(aes(x = mean_skew_pred_bin, y = (fet_TF_footprints_odds)), stat = "identity", fill = "#56BCC2") +
geom_errorbar(aes(x = mean_skew_pred_bin, ymin = (fet_TF_footprints_lci), ymax = (fet_TF_footprints_uci)), stat = "identity", width = 0.2) +
geom_text(aes(x = mean_skew_pred_bin, y = (fet_TF_footprints_uci) + 0.5, label = sprintf("%.2f", fet_TF_footprints_odds)), angle = 90, vjust = 0.5, hjust = 0) +
geom_hline(yintercept = 1, color = "black", linetype = "dashed", alpha = 0.5) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
labs(x = "Mean allelic skew", y = "Odds ratio") +
theme(aspect.ratio = 1/1.6, strip.background = element_blank()) +
ylim(c(0, 11))

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_TF_footprints_fet.pdf", scale=0.7)
## Saving 4.9 x 3.5 in image
write_tsv(gnomAD_main_table_full_TF_footprints, "../../results/gnomAD_selection_analysis/gnomAD_main_table_full_TF_footprints_fet.txt")
gnomAD_main_table_full_TF_ChIP_seq <- read_tsv("../../results/gnomAD_selection_analysis/gnomAD_TF_ChIP_seq_mean_allelic_skew.txt") %>%
mutate(mean_skew_pred_bin = factor(mean_skew_pred_bin, levels =
c("(-Inf, -1.5)", "[-1.5, -1.0)", "[-1.0, -0.5)", "[-0.5, -0.2)",
"[-0.2, -0.05)", "[-0.05, 0.05)", "[0.05, 0.2)", "[0.2, 0.5)",
"[0.5, 1.0)", "[1.0, 1.5)", "[1.5, Inf)"))
) %>%
group_by(mean_skew_pred_bin) %>%
mutate(proportion = count/sum(count)) %>%
ungroup()
## Rows: 22 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): mean_skew_pred_bin
## dbl (1): count
## lgl (1): TF_ChIP_seq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(gnomAD_main_table_full_TF_ChIP_seq) +
geom_bar(aes(x = mean_skew_pred_bin, y = proportion, fill = TF_ChIP_seq), stat = "identity", position = "fill") +
geom_text(aes(x = mean_skew_pred_bin,
y = proportion + 0.15,
label = ifelse(TF_ChIP_seq, sprintf("%.3f", proportion), "")),
size = 3, angle = 90, na.rm = TRUE, color = "white") +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
labs(x = "Mean allelic skew", y = "Proportion", fill = "In TF ChIP-seq") +
theme(aspect.ratio = 1/1.6, strip.background = element_blank()) +
ylim(c(0,1)) +
guides(fill = "none")

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_TF_ChIP_seq_stack.pdf", scale=0.7)
## Saving 4.9 x 3.5 in image
gnomAD_main_table_full_TF_ChIP_seq_gw <- gnomAD_main_table_full_TF_ChIP_seq %>%
dplyr::select(TF_ChIP_seq, mean_skew_pred_bin, count) %>%
group_by(TF_ChIP_seq) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
pivot_wider(names_prefix = "TF_ChIP_seq_", names_from = TF_ChIP_seq, values_from = count)
gnomAD_main_table_full_TF_ChIP_seq_temp <- gnomAD_main_table_full_TF_ChIP_seq %>%
dplyr::select(TF_ChIP_seq, mean_skew_pred_bin, count) %>%
pivot_wider(names_prefix = "TF_ChIP_seq_", names_from = TF_ChIP_seq, values_from = count)
gnomAD_main_table_full_TF_ChIP_seq_fet <- gnomAD_main_table_full_TF_ChIP_seq_temp %>%
rowwise() %>%
mutate(fet_TF_ChIP_seq = list(fisher.test(matrix(c(TF_ChIP_seq_TRUE+1, TF_ChIP_seq_FALSE+1, gnomAD_main_table_full_TF_ChIP_seq_gw[["TF_ChIP_seq_TRUE"]]-TF_ChIP_seq_TRUE+1, gnomAD_main_table_full_TF_ChIP_seq_gw[["TF_ChIP_seq_FALSE"]]-TF_ChIP_seq_FALSE+1), nrow=2)))) %>%
mutate(fet_TF_ChIP_seq_pval = fet_TF_ChIP_seq$p.val) %>%
mutate(fet_TF_ChIP_seq_odds = fet_TF_ChIP_seq$estimate) %>%
mutate(fet_TF_ChIP_seq_lci = fet_TF_ChIP_seq$conf.int[1]) %>%
mutate(fet_TF_ChIP_seq_uci = fet_TF_ChIP_seq$conf.int[2]) %>%
ungroup() %>%
mutate(fet_TF_ChIP_seq_padj = p.adjust(fet_TF_ChIP_seq_pval, method = "fdr")) %>%
mutate(fet_TF_ChIP_seq_sig = ifelse(fet_TF_ChIP_seq_pval < 0.05, "*", ""))
ggplot(gnomAD_main_table_full_TF_ChIP_seq_fet %>% filter()) +
geom_bar(aes(x = mean_skew_pred_bin, y = (fet_TF_ChIP_seq_odds)), stat = "identity", fill = "#56BCC2") +
geom_errorbar(aes(x = mean_skew_pred_bin, ymin = (fet_TF_ChIP_seq_lci), ymax = (fet_TF_ChIP_seq_uci)), stat = "identity", width = 0.2) +
geom_text(aes(x = mean_skew_pred_bin, y = (fet_TF_ChIP_seq_uci) + 0.5, label = sprintf("%.2f", fet_TF_ChIP_seq_odds)), angle = 90, vjust = 0.5, hjust = 0) +
geom_hline(yintercept = 1, color = "black", linetype = "dashed", alpha = 0.5) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
labs(x = "Mean allelic skew", y = "Odds ratio") +
theme(aspect.ratio = 1/1.6, strip.background = element_blank()) +
ylim(c(0, 11))

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_TF_ChIP_seq_fet.pdf", scale=0.7)
## Saving 4.9 x 3.5 in image
write_tsv(gnomAD_main_table_full_TF_ChIP_seq, "../../results/gnomAD_selection_analysis/gnomAD_main_table_full_TF_ChIP_seq_fet.txt")
ggplot(gnomAD_main_table_full_cCREs_pleio %>% filter(ENCODE_cCREs != "Other cCREs") %>% mutate(pleio = as.factor(pleio))) +
geom_hline(yintercept = 0, color = "black") +
geom_bar(aes(pleio, phyloP_mean, fill = pleio), stat = "identity") +
geom_errorbar(aes(pleio, ymin = phyloP_lci, ymax = phyloP_uci), stat = "identity", width = 0.2) +
cowplot::theme_cowplot() +
theme(axis.line.x = element_blank()) +
scale_fill_viridis_d() +
scale_color_viridis_d() +
labs(x = "emVar pleiotropy", y = "Mean constraint (phyloP)") +
guides(fill = "none") +
facet_wrap(~ENCODE_cCREs) +
theme(aspect.ratio = 1, strip.background = element_blank())

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_pleio_phyloP.pdf", scale=1)
## Saving 7 x 5 in image
ggplot(gnomAD_main_table_full_cCREs_mean_ref %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_hline(yintercept = 0, color = "black") +
geom_ribbon(aes(x = mean_ref_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, fill = ENCODE_cCREs, group = ENCODE_cCREs), position = position_dodge(width = 0), alpha = 0.5) +
geom_point(aes(x = mean_ref_pred_bin, y = phyloP_mean, color = ENCODE_cCREs), position = position_dodge(width = 0)) +
geom_errorbar(aes(x = mean_ref_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, color = ENCODE_cCREs), width = 0.5, position = position_dodge(width = 0)) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
scale_color_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "Mean ref activitiy", y = "Mean constraint (phyloP)", color = "ENCODE cCREs") +
guides(fill = "none") +
theme(aspect.ratio = 1/0.99)

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_mean_ref_phyloP.pdf")
## Saving 7 x 5 in image
ggplot(gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_hline(yintercept = 0, color = "black") +
geom_ribbon(aes(x = mean_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, fill = ENCODE_cCREs, group = ENCODE_cCREs), position = position_dodge(width = 0), alpha = 0.5) +
geom_point(aes(x = mean_skew_pred_bin, y = phyloP_mean, color = ENCODE_cCREs), position = position_dodge(width = 0)) +
geom_errorbar(aes(x = mean_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, color = ENCODE_cCREs), width = 0.5, position = position_dodge(width = 0)) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
scale_color_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "Mean allelic skew", y = "Mean constraint (phyloP)", color = "ENCODE cCREs") +
guides(fill = "none") +
theme(aspect.ratio = 1/0.99)

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_mean_phyloP.pdf")
## Saving 7 x 5 in image
ggplot(gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_hline(yintercept = 0, color = "black") +
geom_ribbon(aes(x = mean_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, fill = ENCODE_cCREs, group = ENCODE_cCREs), position = position_dodge(width = 0), alpha = 0.5) +
geom_point(aes(x = mean_skew_pred_bin, y = phyloP_mean, color = ENCODE_cCREs), position = position_dodge(width = 0)) +
geom_errorbar(aes(x = mean_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, color = ENCODE_cCREs), width = 0.5, position = position_dodge(width = 0)) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
scale_color_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "Mean allelic skew", y = "Mean constraint (phyloP)", color = "ENCODE cCREs") +
guides(fill = "none") +
theme(aspect.ratio = 3/2)

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_mean_phyloP_skinny.pdf", scale = 1)
## Saving 7 x 5 in image
ggplot(gnomAD_main_table_full_cCREs_K562 %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_hline(yintercept = 0, color = "black") +
geom_ribbon(aes(x = K562_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, fill = ENCODE_cCREs, group = ENCODE_cCREs), position = position_dodge(width = 0), alpha = 0.5) +
geom_point(aes(x = K562_skew_pred_bin, y = phyloP_mean, color = ENCODE_cCREs), position = position_dodge(width = 0)) +
geom_errorbar(aes(x = K562_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, color = ENCODE_cCREs), width = 0.5, position = position_dodge(width = 0)) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
scale_color_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "K562 allelic skew", y = "Mean constraint (phyloP)", color = "ENCODE cCREs") +
guides(fill = "none") +
theme(aspect.ratio = 1/0.99)

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_K562_phyloP.pdf")
## Saving 7 x 5 in image
ggplot(gnomAD_main_table_full_cCREs_HepG2 %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_hline(yintercept = 0, color = "black") +
geom_ribbon(aes(x = HepG2_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, fill = ENCODE_cCREs, group = ENCODE_cCREs), position = position_dodge(width = 0), alpha = 0.5) +
geom_point(aes(x = HepG2_skew_pred_bin, y = phyloP_mean, color = ENCODE_cCREs), position = position_dodge(width = 0)) +
geom_errorbar(aes(x = HepG2_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, color = ENCODE_cCREs), width = 0.5, position = position_dodge(width = 0)) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
scale_color_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "HepG2 allelic skew", y = "Mean constraint (phyloP)", color = "ENCODE cCREs") +
guides(fill = "none") +
theme(aspect.ratio = 1/0.99)

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_HepG2_phyloP.pdf")
## Saving 7 x 5 in image
ggplot(gnomAD_main_table_full_cCREs_SKNSH %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_hline(yintercept = 0, color = "black") +
geom_ribbon(aes(x = SKNSH_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, fill = ENCODE_cCREs, group = ENCODE_cCREs), position = position_dodge(width = 0), alpha = 0.5) +
geom_point(aes(x = SKNSH_skew_pred_bin, y = phyloP_mean, color = ENCODE_cCREs), position = position_dodge(width = 0)) +
geom_errorbar(aes(x = SKNSH_skew_pred_bin, ymin = phyloP_lci, ymax = phyloP_uci, color = ENCODE_cCREs), width = 0.5, position = position_dodge(width = 0)) +
cowplot::theme_cowplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.line.x = element_blank()) +
scale_fill_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
scale_color_manual(values = c("#3E0751", "#405187", "#468E8B", "#79C66E", "grey50")) +
labs(x = "SKNSH allelic skew", y = "Mean constraint (phyloP)", color = "ENCODE cCREs") +
guides(fill = "none") +
theme(aspect.ratio = 1/0.99)

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_SKNSH_phyloP.pdf")
## Saving 7 x 5 in image
ggplot(gnomAD_main_table_full_cCREs_mean_category %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_tile(aes(x = mean_skew_pred_bin, y = category_copy, fill = phyloP_mean)) +
geom_text(aes(x = mean_skew_pred_bin, y = category_copy,
label = round(phyloP_mean, 2)),
color = "white", size = 2) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.line = element_blank()) +
scale_fill_viridis_c() +
labs(x = "Mean allelic skew", y = "ENCODE cCREs", fill = "Mean constraint (phyloP)") +
facet_grid(rows = vars(ENCODE_cCREs)) +
coord_fixed() + # Ensures square tiles
theme(strip.background = element_blank())
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_mean_category_count_heat.pdf")
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggplot(gnomAD_main_table_full_cCREs_category %>% filter(ENCODE_cCREs != "Other cCREs")) +
geom_tile(aes(x = 1, y = category_copy, fill = phyloP_mean)) +
geom_text(aes(x = 1, y = category_copy,
label = round(phyloP_mean, 2)),
color = "white", size = 2) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.line = element_blank()) +
scale_fill_viridis_c(limits = c(
min(gnomAD_main_table_full_cCREs_mean_category$phyloP_mean, na.rm = T),
max(gnomAD_main_table_full_cCREs_mean_category$phyloP_mean, na.rm = T))) +
labs(x = "", y = "ENCODE cCREs", fill = "Mean constraint (phyloP)") +
facet_grid(rows = vars(ENCODE_cCREs)) +
coord_fixed() + # Ensures square tiles
theme(strip.background = element_blank())

ggsave("../../results/gnomAD_selection_analysis/gnomAD_main_table_full_cCREs_category_count_heat.pdf")
## Saving 7 x 5 in image
gnomAD_main_table_full_rare_common_phyloP_mean <- gnomAD_main_table_full_cCREs_category %>%
mutate(category_copy_temp = ifelse(category_copy %in% c("SINGLETON", "ULTRARARE", "RARE"), "RARE", "COMMON")) %>%
group_by(category_copy_temp) %>%
summarise(
sum_count = sum(count),
sum_phyloP = sum(count * phyloP_mean)
) %>%
ungroup()
filter(gnomAD_main_table_full_rare_common_phyloP_mean, category_copy_temp == "RARE")$sum_phyloP/
filter(gnomAD_main_table_full_rare_common_phyloP_mean, category_copy_temp == "RARE")$sum_count
## [1] -0.07896777
filter(gnomAD_main_table_full_rare_common_phyloP_mean, category_copy_temp == "COMMON")$sum_phyloP/
filter(gnomAD_main_table_full_rare_common_phyloP_mean, category_copy_temp == "COMMON")$sum_count
## [1] -0.3016857
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "PLS", mean_skew_pred_bin == "(-Inf, -1.5)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, fet_RARE_COMMON_odds, fet_RARE_COMMON_lci, fet_RARE_COMMON_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin fet_RARE_COMMON_odds fet_RARE_COMMON_lci
## <fct> <fct> <dbl> <dbl>
## 1 PLS (-Inf, -1.5) 1.84 1.52
## # ℹ 1 more variable: fet_RARE_COMMON_uci <dbl>
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "PLS", mean_skew_pred_bin == "[1.0, 1.5)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, fet_RARE_COMMON_odds, fet_RARE_COMMON_lci, fet_RARE_COMMON_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin fet_RARE_COMMON_odds fet_RARE_COMMON_lci
## <fct> <fct> <dbl> <dbl>
## 1 PLS [1.0, 1.5) 1.43 1.20
## # ℹ 1 more variable: fet_RARE_COMMON_uci <dbl>
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "PLS", mean_skew_pred_bin == "[1.5, Inf)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, fet_RARE_COMMON_odds, fet_RARE_COMMON_lci, fet_RARE_COMMON_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin fet_RARE_COMMON_odds fet_RARE_COMMON_lci
## <fct> <fct> <dbl> <dbl>
## 1 PLS [1.5, Inf) 1.20 0.850
## # ℹ 1 more variable: fet_RARE_COMMON_uci <dbl>
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "PLS", mean_skew_pred_bin == "(-Inf, -1.5)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, phyloP_mean, phyloP_lci, phyloP_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin phyloP_mean phyloP_lci phyloP_uci
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 PLS (-Inf, -1.5) 3.25 3.14 3.36
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "pELS", mean_skew_pred_bin == "(-Inf, -1.5)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, phyloP_mean, phyloP_lci, phyloP_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin phyloP_mean phyloP_lci phyloP_uci
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 pELS (-Inf, -1.5) 0.868 0.798 0.938
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "dELS", mean_skew_pred_bin == "(-Inf, -1.5)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, phyloP_mean, phyloP_lci, phyloP_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin phyloP_mean phyloP_lci phyloP_uci
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 dELS (-Inf, -1.5) -0.0525 -0.0717 -0.0333
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "PLS", mean_skew_pred_bin == "[1.5, Inf)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, phyloP_mean, phyloP_lci, phyloP_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin phyloP_mean phyloP_lci phyloP_uci
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 PLS [1.5, Inf) 0.657 0.492 0.821
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "pELS", mean_skew_pred_bin == "[1.5, Inf)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, phyloP_mean, phyloP_lci, phyloP_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin phyloP_mean phyloP_lci phyloP_uci
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 pELS [1.5, Inf) 0.184 0.130 0.237
gnomAD_main_table_full_cCREs_mean %>% filter(ENCODE_cCREs == "dELS", mean_skew_pred_bin == "[1.5, Inf)") %>% dplyr::select(ENCODE_cCREs, mean_skew_pred_bin, phyloP_mean, phyloP_lci, phyloP_uci)
## # A tibble: 1 × 5
## ENCODE_cCREs mean_skew_pred_bin phyloP_mean phyloP_lci phyloP_uci
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 dELS [1.5, Inf) -0.152 -0.173 -0.131